-
Notifications
You must be signed in to change notification settings - Fork 350
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
fix: Adding shape distingushing to the engine cache #3154
Conversation
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
There are some changes that do not conform to Python style guidelines:
--- /home/runner/work/TensorRT/TensorRT/py/torch_tensorrt/dynamo/_settings.py 2024-09-11 04:38:03.076285+00:00
+++ /home/runner/work/TensorRT/TensorRT/py/torch_tensorrt/dynamo/_settings.py 2024-09-11 04:38:21.932007+00:00
@@ -112,28 +112,31 @@
lazy_engine_init: bool = LAZY_ENGINE_INIT
cache_built_engines: bool = CACHE_BUILT_ENGINES
reuse_cached_engines: bool = REUSE_CACHED_ENGINES
-_SETTINGS_TO_BE_ENGINE_INVARIANT =(
+_SETTINGS_TO_BE_ENGINE_INVARIANT = (
"enabled_precisions",
"max_aux_streams",
"version_compatible",
"optimization_level",
"disable_tf32",
"sparse_weights",
"make_refittable",
"engine_capability",
- "hardware_compatible",)
+ "hardware_compatible",
+)
-def settings_are_compatible(set_a: CompilationSettings, set_b: CompilationSettings) -> Tuple[bool, Set[str]]:
+def settings_are_compatible(
+ set_a: CompilationSettings, set_b: CompilationSettings
+) -> Tuple[bool, Set[str]]:
incompatible_settings: Set[str] = set()
for field in _SETTINGS_TO_BE_ENGINE_INVARIANT:
- if getattr(set_a, field) != getattr(set_b, field):
- incompatible_settings.add(field)
+ if getattr(set_a, field) != getattr(set_b, field):
+ incompatible_settings.add(field)
if len(incompatible_settings) == 0:
return True, set()
else:
return False, incompatible_settings
--- /home/runner/work/TensorRT/TensorRT/py/torch_tensorrt/dynamo/_engine_cache.py 2024-09-11 04:38:03.076285+00:00
+++ /home/runner/work/TensorRT/TensorRT/py/torch_tensorrt/dynamo/_engine_cache.py 2024-09-11 04:38:21.950622+00:00
@@ -12,16 +12,27 @@
from sympy.polys.matrices.dense import Sequence
import torch
from torch._inductor.codecache import FxGraphCachePickler, sha256_hash
from torch.fx.experimental.proxy_tensor import unset_fake_temporarily
-from torch_tensorrt.dynamo._settings import CompilationSettings, _SETTINGS_TO_BE_ENGINE_INVARIANT
+from torch_tensorrt.dynamo._settings import (
+ CompilationSettings,
+ _SETTINGS_TO_BE_ENGINE_INVARIANT,
+)
from torch_tensorrt._Input import Input
_LOGGER: logging.Logger = logging.getLogger(__name__)
-UnpackedCacheHit = Tuple[bytes, List[str], List[str], Tuple[Input], CompilationSettings, Optional[Dict[Any, Any]]]
+UnpackedCacheHit = Tuple[
+ bytes,
+ List[str],
+ List[str],
+ Tuple[Input],
+ CompilationSettings,
+ Optional[Dict[Any, Any]],
+]
+
class BaseEngineCache(ABC):
@abstractmethod
def __init__(
@@ -30,11 +41,15 @@
**kwargs: Any,
) -> None:
pass
@staticmethod
- def get_hash(gm: torch.fx.GraphModule, input_specs: Sequence[Input], settings: CompilationSettings) -> str:
+ def get_hash(
+ gm: torch.fx.GraphModule,
+ input_specs: Sequence[Input],
+ settings: CompilationSettings,
+ ) -> str:
"""Get the hash value of the GraphModule
Args:
gm (torch.fx.GraphModule): GraphModule to hash
@@ -53,11 +68,13 @@
with io.BytesIO() as stream:
input_specs_data = pickle.dumps(input_spec_strs)
input_specs_data = pickletools.optimize(input_specs_data)
input_specs_hash = sha256_hash(input_specs_data)
- invariant_engine_specs = [str(getattr(settings, field)) for field in _SETTINGS_TO_BE_ENGINE_INVARIANT]
+ invariant_engine_specs = [
+ str(getattr(settings, field)) for field in _SETTINGS_TO_BE_ENGINE_INVARIANT
+ ]
with io.BytesIO() as stream:
engine_specs_data = pickle.dumps(invariant_engine_specs)
engine_specs_data = pickletools.optimize(engine_specs_data)
engine_specs_hash = sha256_hash(engine_specs_data)
@@ -87,12 +104,11 @@
bytes: packed blob
"""
settings = copy.deepcopy(compilation_settings)
settings.torch_executed_ops = {
- f"torch.ops.{op.__str__()}"
- for op in settings.torch_executed_ops
+ f"torch.ops.{op.__str__()}" for op in settings.torch_executed_ops
}
return pickle.dumps(
{
"serialized_engine": bytes(serialized_engine),
@@ -122,11 +138,13 @@
unpacked["input_specs"],
unpacked["compilation_settings"],
unpacked["weight_name_map"],
)
- def insert(self, hash: str, entry: UnpackedCacheHit, *args: Any, **kwargs: Any) -> None:
+ def insert(
+ self, hash: str, entry: UnpackedCacheHit, *args: Any, **kwargs: Any
+ ) -> None:
"""
Insert a cache entry into the engine cache.
Args:
hash (str): The hash value of the GraphModule.
@@ -137,11 +155,10 @@
Returns:
None
"""
packed_cache_info = BaseEngineCache.pack(*entry)
return self.save(hash, packed_cache_info, *args, **kwargs)
-
def check(self, hash: str, *args: Any, **kwargs: Any) -> Optional[UnpackedCacheHit]:
"""
Check if a cache entry exists for the given hash.
--- /home/runner/work/TensorRT/TensorRT/py/torch_tensorrt/dynamo/conversion/_TRTInterpreter.py 2024-09-11 04:38:03.076285+00:00
+++ /home/runner/work/TensorRT/TensorRT/py/torch_tensorrt/dynamo/conversion/_TRTInterpreter.py 2024-09-11 04:38:22.587202+00:00
@@ -531,20 +531,35 @@
if self.engine_cache is not None:
if (
self.compilation_settings.cache_built_engines
or self.compilation_settings.reuse_cached_engines
):
- hash_val = self.engine_cache.get_hash(self.module, self.input_specs, self.compilation_settings)
+ hash_val = self.engine_cache.get_hash(
+ self.module, self.input_specs, self.compilation_settings
+ )
if self.compilation_settings.reuse_cached_engines:
# query the cached TRT engine
cached_data = self.engine_cache.check(hash_val)
- if cached_data is not None: # hit the cache
- (serialized_engine, self._input_names, self._output_names, engine_input_specs, engine_compilation_settings, self.weight_name_map) = cached_data
-
- setting_compatiblity, incompattible_settings = settings_are_compatible(self.compilation_settings, engine_compilation_settings)
- assert setting_compatiblity, f"Attempted to refit a prebuilt engine with incompatible settings: {incompattible_settings}, (old_settings: {engine_compilation_settings}, new_settings: {self.compilation_settings})"
+ if cached_data is not None: # hit the cache
+ (
+ serialized_engine,
+ self._input_names,
+ self._output_names,
+ engine_input_specs,
+ engine_compilation_settings,
+ self.weight_name_map,
+ ) = cached_data
+
+ setting_compatiblity, incompattible_settings = (
+ settings_are_compatible(
+ self.compilation_settings, engine_compilation_settings
+ )
+ )
+ assert (
+ setting_compatiblity
+ ), f"Attempted to refit a prebuilt engine with incompatible settings: {incompattible_settings}, (old_settings: {engine_compilation_settings}, new_settings: {self.compilation_settings})"
_LOGGER.info(
"Found the cached engine that corresponds to this graph. It is directly loaded."
)
@@ -609,18 +624,21 @@
)
if (
self.engine_cache is not None
and self.compilation_settings.cache_built_engines
):
- self.engine_cache.insert(hash_val, (
- serialized_engine,
- self._input_names,
- self._output_names,
- self.input_specs,
- self.compilation_settings,
- self.weight_name_map,
- ))
+ self.engine_cache.insert(
+ hash_val,
+ (
+ serialized_engine,
+ self._input_names,
+ self._output_names,
+ self.input_specs,
+ self.compilation_settings,
+ self.weight_name_map,
+ ),
+ )
with io.BytesIO() as engine_bytes:
engine_bytes.write(serialized_engine)
engine_str = engine_bytes.getvalue()
--- /home/runner/work/TensorRT/TensorRT/tests/py/dynamo/models/test_engine_cache.py 2024-09-11 04:38:03.108285+00:00
+++ /home/runner/work/TensorRT/TensorRT/tests/py/dynamo/models/test_engine_cache.py 2024-09-11 04:38:26.801121+00:00
@@ -16,11 +16,10 @@
from torch_tensorrt.dynamo.utils import COSINE_THRESHOLD, cosine_similarity
assertions = unittest.TestCase()
-
class MyEngineCache(BaseEngineCache):
def __init__(
self,
engine_cache_dir: str,
) -> None:
@@ -55,111 +54,125 @@
blob = f.read()
self.hashes[hash] += 1
return blob
return None
+
class TestHashFunction(TestCase):
def test_reexport_is_equal(self):
pyt_model = models.resnet18(pretrained=True).eval().to("cuda")
example_inputs = (torch.randn((100, 3, 224, 224)).to("cuda"),)
batch = torch.export.Dim("batch", min=1, max=200)
exp_program1 = torch.export.export(
- pyt_model,
- args=example_inputs,
- dynamic_shapes={"x": {0: batch}}
- )
- input_specs1 = (torch_trt.Input(min_shape=(1, 3, 224, 224), opt_shape=(100, 3, 224, 224), max_shape=(200, 3, 224, 224)),)
+ pyt_model, args=example_inputs, dynamic_shapes={"x": {0: batch}}
+ )
+ input_specs1 = (
+ torch_trt.Input(
+ min_shape=(1, 3, 224, 224),
+ opt_shape=(100, 3, 224, 224),
+ max_shape=(200, 3, 224, 224),
+ ),
+ )
settings1 = CompilationSettings(
- make_refittable=True,
- cache_built_engines=True,
- reuse_cached_engines=True
+ make_refittable=True, cache_built_engines=True, reuse_cached_engines=True
)
hash1 = BaseEngineCache.get_hash(exp_program1.module(), input_specs1, settings1)
exp_program2 = torch.export.export(
- pyt_model,
- args=example_inputs,
- dynamic_shapes={"x": {0: batch}}
- )
- input_specs2 = (torch_trt.Input(min_shape=(1, 3, 224, 224), opt_shape=(100, 3, 224, 224), max_shape=(200, 3, 224, 224)),)
+ pyt_model, args=example_inputs, dynamic_shapes={"x": {0: batch}}
+ )
+ input_specs2 = (
+ torch_trt.Input(
+ min_shape=(1, 3, 224, 224),
+ opt_shape=(100, 3, 224, 224),
+ max_shape=(200, 3, 224, 224),
+ ),
+ )
settings2 = CompilationSettings(
- make_refittable=True,
- cache_built_engines=True,
- reuse_cached_engines=True
+ make_refittable=True, cache_built_engines=True, reuse_cached_engines=True
)
hash2 = BaseEngineCache.get_hash(exp_program2.module(), input_specs2, settings2)
self.assertEqual(hash1, hash2)
-
-
def test_input_shape_change_is_not_equal(self):
pyt_model = models.resnet18(pretrained=True).eval().to("cuda")
example_inputs = (torch.randn((100, 3, 224, 224)).to("cuda"),)
batch = torch.export.Dim("batch", min=1, max=200)
exp_program1 = torch.export.export(
- pyt_model,
- args=example_inputs,
- dynamic_shapes={"x": {0: batch}}
- )
- input_specs1 = (torch_trt.Input(min_shape=(1, 3, 224, 224), opt_shape=(100, 3, 224, 224), max_shape=(200, 3, 224, 224)),)
+ pyt_model, args=example_inputs, dynamic_shapes={"x": {0: batch}}
+ )
+ input_specs1 = (
+ torch_trt.Input(
+ min_shape=(1, 3, 224, 224),
+ opt_shape=(100, 3, 224, 224),
+ max_shape=(200, 3, 224, 224),
+ ),
+ )
settings1 = CompilationSettings(
- make_refittable=True,
- cache_built_engines=True,
- reuse_cached_engines=True
+ make_refittable=True, cache_built_engines=True, reuse_cached_engines=True
)
hash1 = BaseEngineCache.get_hash(exp_program1.module(), input_specs1, settings1)
exp_program2 = torch.export.export(
- pyt_model,
- args=example_inputs,
- dynamic_shapes={"x": {0: batch}}
- )
- input_specs2 = (torch_trt.Input(min_shape=(1, 3, 300, 300), opt_shape=(100, 3, 300, 300), max_shape=(200, 3, 300, 300)),)
+ pyt_model, args=example_inputs, dynamic_shapes={"x": {0: batch}}
+ )
+ input_specs2 = (
+ torch_trt.Input(
+ min_shape=(1, 3, 300, 300),
+ opt_shape=(100, 3, 300, 300),
+ max_shape=(200, 3, 300, 300),
+ ),
+ )
settings2 = CompilationSettings(
- make_refittable=True,
- cache_built_engines=True,
- reuse_cached_engines=True
+ make_refittable=True, cache_built_engines=True, reuse_cached_engines=True
)
hash2 = BaseEngineCache.get_hash(exp_program2.module(), input_specs2, settings2)
self.assertNotEqual(hash1, hash2)
-
def test_engine_settings_is_not_equal(self):
pyt_model = models.resnet18(pretrained=True).eval().to("cuda")
example_inputs = (torch.randn((100, 3, 224, 224)).to("cuda"),)
batch = torch.export.Dim("batch", min=1, max=200)
exp_program1 = torch.export.export(
- pyt_model,
- args=example_inputs,
- dynamic_shapes={"x": {0: batch}}
- )
- input_specs1 = (torch_trt.Input(min_shape=(1, 3, 224, 224), opt_shape=(100, 3, 224, 224), max_shape=(200, 3, 224, 224)),)
+ pyt_model, args=example_inputs, dynamic_shapes={"x": {0: batch}}
+ )
+ input_specs1 = (
+ torch_trt.Input(
+ min_shape=(1, 3, 224, 224),
+ opt_shape=(100, 3, 224, 224),
+ max_shape=(200, 3, 224, 224),
+ ),
+ )
settings1 = CompilationSettings(
make_refittable=True,
cache_built_engines=True,
reuse_cached_engines=True,
- enabled_precisions={torch.float32}
+ enabled_precisions={torch.float32},
)
hash1 = BaseEngineCache.get_hash(exp_program1.module(), input_specs1, settings1)
exp_program2 = torch.export.export(
- pyt_model,
- args=example_inputs,
- dynamic_shapes={"x": {0: batch}}
- )
- input_specs2 = (torch_trt.Input(min_shape=(1, 3, 300, 300), opt_shape=(100, 3, 300, 300), max_shape=(200, 3, 300, 300)),)
+ pyt_model, args=example_inputs, dynamic_shapes={"x": {0: batch}}
+ )
+ input_specs2 = (
+ torch_trt.Input(
+ min_shape=(1, 3, 300, 300),
+ opt_shape=(100, 3, 300, 300),
+ max_shape=(200, 3, 300, 300),
+ ),
+ )
settings2 = CompilationSettings(
make_refittable=True,
cache_built_engines=True,
reuse_cached_engines=True,
- enabled_precisions={torch.float32, torch.float16}
+ enabled_precisions={torch.float32, torch.float16},
)
hash2 = BaseEngineCache.get_hash(exp_program2.module(), input_specs2, settings2)
self.assertNotEqual(hash1, hash2)
@@ -207,11 +220,11 @@
debug=False,
min_block_size=1,
make_refittable=True,
cache_built_engines=cache_built_engines,
reuse_cached_engines=reuse_cached_engines,
- engine_cache_dir=engine_cache_dir
+ engine_cache_dir=engine_cache_dir,
)
end.record()
torch.cuda.synchronize()
torch._dynamo.reset()
times.append(start.elapsed_time(end))
@@ -288,12 +301,17 @@
assertions.assertTrue(
cos_sim > COSINE_THRESHOLD,
msg=f"results[1] doesn't match with results[2]. Cosine sim score: {cos_sim} Threshold: {COSINE_THRESHOLD}",
)
- [assertions.assertTrue(count == 1, f"cache was not hit exactly once for entry ({h}, hit: {count})") for h, count in custom_engine_cache.hashes.items()]
-
+ [
+ assertions.assertTrue(
+ count == 1,
+ f"cache was not hit exactly once for entry ({h}, hit: {count})",
+ )
+ for h, count in custom_engine_cache.hashes.items()
+ ]
def test_dynamo_compile_change_input_shape(self):
"""Runs compilation 3 times, the cache should miss each time"""
model = models.resnet18(pretrained=True).eval().to("cuda")
# Mark the dim0 of inputs as dynamic
@@ -303,11 +321,11 @@
shutil.rmtree(engine_cache_dir)
custom_engine_cache = MyEngineCache(engine_cache_dir)
for i in range(3):
- inputs = (torch.rand((4*(i + 1), 3, 224, 224)).to("cuda"),)
+ inputs = (torch.rand((4 * (i + 1), 3, 224, 224)).to("cuda"),)
trt_gm = torch_trt.dynamo.compile(
torch.export.export(model, args=inputs),
inputs=inputs,
use_python_runtime=False,
enabled_precisions={torch.float},
@@ -316,11 +334,16 @@
make_refittable=True,
cache_built_engines=True,
reuse_cached_engines=True,
)
- [assertions.assertTrue(count == 0, f"Unintended cache hit for entry ({h}, hit: {count})") for h, count in custom_engine_cache.hashes.items()]
+ [
+ assertions.assertTrue(
+ count == 0, f"Unintended cache hit for entry ({h}, hit: {count})"
+ )
+ for h, count in custom_engine_cache.hashes.items()
+ ]
@pytest.mark.xfail
def test_torch_compile_with_default_disk_engine_cache(self):
# Custom Engine Cache
model = models.resnet18(pretrained=True).eval().to("cuda")
@@ -360,11 +383,11 @@
"make_refittable": True,
"cache_built_engines": cache_built_engines,
"reuse_cached_engines": reuse_cached_engines,
"engine_cache_dir": engine_cache_dir,
"engine_cache_size": 1 << 30, # 1GB
- "torch_executed_ops": {"torch.ops.aten.relu.default"}
+ "torch_executed_ops": {"torch.ops.aten.relu.default"},
},
)
results.append(compiled_model(*inputs)) # trigger the compilation
end.record()
torch.cuda.synchronize()
@@ -426,11 +449,11 @@
"min_block_size": 1,
"make_refittable": True,
"cache_built_engines": cache_built_engines,
"reuse_cached_engines": reuse_cached_engines,
"custom_engine_cache": custom_engine_cache,
- "torch_executed_ops": {"torch.ops.aten.relu.default"}
+ "torch_executed_ops": {"torch.ops.aten.relu.default"},
},
)
results.append(compiled_model(*inputs)) # trigger the compilation
end.record()
torch.cuda.synchronize()
@@ -447,13 +470,17 @@
assertions.assertTrue(
cos_sim > COSINE_THRESHOLD,
msg=f"results[1] doesn't match with results[2]. Cosine sim score: {cos_sim} Threshold: {COSINE_THRESHOLD}",
)
- [assertions.assertTrue(count == 1, f"cache was not hit exactly once for entry ({h}, hit: {count})") for h, count in custom_engine_cache.hashes.items()]
-
-
+ [
+ assertions.assertTrue(
+ count == 1,
+ f"cache was not hit exactly once for entry ({h}, hit: {count})",
+ )
+ for h, count in custom_engine_cache.hashes.items()
+ ]
def test_torch_compile_change_input_shape(self):
# Custom Engine Cache
model = models.resnet18(pretrained=True).eval().to("cuda")
@@ -475,10 +502,15 @@
"min_block_size": 1,
"make_refittable": True,
"cache_built_engines": True,
"reuse_cached_engines": True,
"custom_engine_cache": custom_engine_cache,
- "torch_executed_ops": {"torch.ops.aten.relu.default"}
+ "torch_executed_ops": {"torch.ops.aten.relu.default"},
},
)
- [assertions.assertTrue(count == 0, f"Unintended cache hit for entry ({h}, hit: {count})") for h, count in custom_engine_cache.hashes.items()]
+ [
+ assertions.assertTrue(
+ count == 0, f"Unintended cache hit for entry ({h}, hit: {count})"
+ )
+ for h, count in custom_engine_cache.hashes.items()
+ ]
--- /home/runner/work/TensorRT/TensorRT/tests/py/dynamo/models/test_models_export.py 2024-09-11 04:38:03.108285+00:00
+++ /home/runner/work/TensorRT/TensorRT/tests/py/dynamo/models/test_models_export.py 2024-09-11 04:38:26.804639+00:00
@@ -196,11 +196,12 @@
@unittest.skipIf(
torch.cuda.get_device_properties(torch.cuda.current_device()).major < 9,
"FP8 compilation in Torch-TRT is not supported on cards older than Hopper",
)
@unittest.skipIf(
- not importlib.util.find_spec("modelopt"), reason="ModelOpt is necessary to run this test"
+ not importlib.util.find_spec("modelopt"),
+ reason="ModelOpt is necessary to run this test",
)
@pytest.mark.unit
def test_base_fp8(ir):
import modelopt
@@ -244,12 +245,14 @@
reuse_cached_engines=False,
)
outputs_trt = trt_model(input_tensor)
assert torch.allclose(output_pyt, outputs_trt, rtol=1e-3, atol=1e-2)
+
@unittest.skipIf(
- not importlib.util.find_spec("modelopt") or Version(importlib.metadata.version("modelopt")) < Version("0.16.1"),
+ not importlib.util.find_spec("modelopt")
+ or Version(importlib.metadata.version("modelopt")) < Version("0.16.1"),
"modelopt 0.16.1 or later is required Int8 quantization is supported in modelopt since 0.16.1 or later",
)
@pytest.mark.unit
def test_base_int8(ir):
import modelopt
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
There are some changes that do not conform to Python style guidelines:
--- /home/runner/work/TensorRT/TensorRT/py/torch_tensorrt/dynamo/_settings.py 2024-09-11 04:38:03.025108+00:00
+++ /home/runner/work/TensorRT/TensorRT/py/torch_tensorrt/dynamo/_settings.py 2024-09-11 04:38:26.592145+00:00
@@ -112,28 +112,31 @@
lazy_engine_init: bool = LAZY_ENGINE_INIT
cache_built_engines: bool = CACHE_BUILT_ENGINES
reuse_cached_engines: bool = REUSE_CACHED_ENGINES
-_SETTINGS_TO_BE_ENGINE_INVARIANT =(
+_SETTINGS_TO_BE_ENGINE_INVARIANT = (
"enabled_precisions",
"max_aux_streams",
"version_compatible",
"optimization_level",
"disable_tf32",
"sparse_weights",
"make_refittable",
"engine_capability",
- "hardware_compatible",)
+ "hardware_compatible",
+)
-def settings_are_compatible(set_a: CompilationSettings, set_b: CompilationSettings) -> Tuple[bool, Set[str]]:
+def settings_are_compatible(
+ set_a: CompilationSettings, set_b: CompilationSettings
+) -> Tuple[bool, Set[str]]:
incompatible_settings: Set[str] = set()
for field in _SETTINGS_TO_BE_ENGINE_INVARIANT:
- if getattr(set_a, field) != getattr(set_b, field):
- incompatible_settings.add(field)
+ if getattr(set_a, field) != getattr(set_b, field):
+ incompatible_settings.add(field)
if len(incompatible_settings) == 0:
return True, set()
else:
return False, incompatible_settings
--- /home/runner/work/TensorRT/TensorRT/py/torch_tensorrt/dynamo/_engine_cache.py 2024-09-11 04:38:03.025108+00:00
+++ /home/runner/work/TensorRT/TensorRT/py/torch_tensorrt/dynamo/_engine_cache.py 2024-09-11 04:38:26.643775+00:00
@@ -12,16 +12,27 @@
from sympy.polys.matrices.dense import Sequence
import torch
from torch._inductor.codecache import FxGraphCachePickler, sha256_hash
from torch.fx.experimental.proxy_tensor import unset_fake_temporarily
-from torch_tensorrt.dynamo._settings import CompilationSettings, _SETTINGS_TO_BE_ENGINE_INVARIANT
+from torch_tensorrt.dynamo._settings import (
+ CompilationSettings,
+ _SETTINGS_TO_BE_ENGINE_INVARIANT,
+)
from torch_tensorrt._Input import Input
_LOGGER: logging.Logger = logging.getLogger(__name__)
-UnpackedCacheHit = Tuple[bytes, List[str], List[str], Tuple[Input], CompilationSettings, Optional[Dict[Any, Any]]]
+UnpackedCacheHit = Tuple[
+ bytes,
+ List[str],
+ List[str],
+ Tuple[Input],
+ CompilationSettings,
+ Optional[Dict[Any, Any]],
+]
+
class BaseEngineCache(ABC):
@abstractmethod
def __init__(
@@ -30,11 +41,15 @@
**kwargs: Any,
) -> None:
pass
@staticmethod
- def get_hash(gm: torch.fx.GraphModule, input_specs: Sequence[Input], settings: CompilationSettings) -> str:
+ def get_hash(
+ gm: torch.fx.GraphModule,
+ input_specs: Sequence[Input],
+ settings: CompilationSettings,
+ ) -> str:
"""Get the hash value of the GraphModule
Args:
gm (torch.fx.GraphModule): GraphModule to hash
@@ -53,11 +68,13 @@
with io.BytesIO() as stream:
input_specs_data = pickle.dumps(input_spec_strs)
input_specs_data = pickletools.optimize(input_specs_data)
input_specs_hash = sha256_hash(input_specs_data)
- invariant_engine_specs = [str(getattr(settings, field)) for field in _SETTINGS_TO_BE_ENGINE_INVARIANT]
+ invariant_engine_specs = [
+ str(getattr(settings, field)) for field in _SETTINGS_TO_BE_ENGINE_INVARIANT
+ ]
with io.BytesIO() as stream:
engine_specs_data = pickle.dumps(invariant_engine_specs)
engine_specs_data = pickletools.optimize(engine_specs_data)
engine_specs_hash = sha256_hash(engine_specs_data)
@@ -87,12 +104,11 @@
bytes: packed blob
"""
settings = copy.deepcopy(compilation_settings)
settings.torch_executed_ops = {
- f"torch.ops.{op.__str__()}"
- for op in settings.torch_executed_ops
+ f"torch.ops.{op.__str__()}" for op in settings.torch_executed_ops
}
return pickle.dumps(
{
"serialized_engine": bytes(serialized_engine),
@@ -122,11 +138,13 @@
unpacked["input_specs"],
unpacked["compilation_settings"],
unpacked["weight_name_map"],
)
- def insert(self, hash: str, entry: UnpackedCacheHit, *args: Any, **kwargs: Any) -> None:
+ def insert(
+ self, hash: str, entry: UnpackedCacheHit, *args: Any, **kwargs: Any
+ ) -> None:
"""
Insert a cache entry into the engine cache.
Args:
hash (str): The hash value of the GraphModule.
@@ -137,11 +155,10 @@
Returns:
None
"""
packed_cache_info = BaseEngineCache.pack(*entry)
return self.save(hash, packed_cache_info, *args, **kwargs)
-
def check(self, hash: str, *args: Any, **kwargs: Any) -> Optional[UnpackedCacheHit]:
"""
Check if a cache entry exists for the given hash.
--- /home/runner/work/TensorRT/TensorRT/py/torch_tensorrt/dynamo/conversion/_TRTInterpreter.py 2024-09-11 04:38:03.025108+00:00
+++ /home/runner/work/TensorRT/TensorRT/py/torch_tensorrt/dynamo/conversion/_TRTInterpreter.py 2024-09-11 04:38:27.224048+00:00
@@ -531,20 +531,35 @@
if self.engine_cache is not None:
if (
self.compilation_settings.cache_built_engines
or self.compilation_settings.reuse_cached_engines
):
- hash_val = self.engine_cache.get_hash(self.module, self.input_specs, self.compilation_settings)
+ hash_val = self.engine_cache.get_hash(
+ self.module, self.input_specs, self.compilation_settings
+ )
if self.compilation_settings.reuse_cached_engines:
# query the cached TRT engine
cached_data = self.engine_cache.check(hash_val)
- if cached_data is not None: # hit the cache
- (serialized_engine, self._input_names, self._output_names, engine_input_specs, engine_compilation_settings, self.weight_name_map) = cached_data
-
- setting_compatiblity, incompattible_settings = settings_are_compatible(self.compilation_settings, engine_compilation_settings)
- assert setting_compatiblity, f"Attempted to refit a prebuilt engine with incompatible settings: {incompattible_settings}, (old_settings: {engine_compilation_settings}, new_settings: {self.compilation_settings})"
+ if cached_data is not None: # hit the cache
+ (
+ serialized_engine,
+ self._input_names,
+ self._output_names,
+ engine_input_specs,
+ engine_compilation_settings,
+ self.weight_name_map,
+ ) = cached_data
+
+ setting_compatiblity, incompattible_settings = (
+ settings_are_compatible(
+ self.compilation_settings, engine_compilation_settings
+ )
+ )
+ assert (
+ setting_compatiblity
+ ), f"Attempted to refit a prebuilt engine with incompatible settings: {incompattible_settings}, (old_settings: {engine_compilation_settings}, new_settings: {self.compilation_settings})"
_LOGGER.info(
"Found the cached engine that corresponds to this graph. It is directly loaded."
)
@@ -609,18 +624,21 @@
)
if (
self.engine_cache is not None
and self.compilation_settings.cache_built_engines
):
- self.engine_cache.insert(hash_val, (
- serialized_engine,
- self._input_names,
- self._output_names,
- self.input_specs,
- self.compilation_settings,
- self.weight_name_map,
- ))
+ self.engine_cache.insert(
+ hash_val,
+ (
+ serialized_engine,
+ self._input_names,
+ self._output_names,
+ self.input_specs,
+ self.compilation_settings,
+ self.weight_name_map,
+ ),
+ )
with io.BytesIO() as engine_bytes:
engine_bytes.write(serialized_engine)
engine_str = engine_bytes.getvalue()
--- /home/runner/work/TensorRT/TensorRT/tests/py/dynamo/models/test_engine_cache.py 2024-09-11 04:38:03.053108+00:00
+++ /home/runner/work/TensorRT/TensorRT/tests/py/dynamo/models/test_engine_cache.py 2024-09-11 04:38:31.558325+00:00
@@ -16,11 +16,10 @@
from torch_tensorrt.dynamo.utils import COSINE_THRESHOLD, cosine_similarity
assertions = unittest.TestCase()
-
class MyEngineCache(BaseEngineCache):
def __init__(
self,
engine_cache_dir: str,
) -> None:
@@ -55,111 +54,125 @@
blob = f.read()
self.hashes[hash] += 1
return blob
return None
+
class TestHashFunction(TestCase):
def test_reexport_is_equal(self):
pyt_model = models.resnet18(pretrained=True).eval().to("cuda")
example_inputs = (torch.randn((100, 3, 224, 224)).to("cuda"),)
batch = torch.export.Dim("batch", min=1, max=200)
exp_program1 = torch.export.export(
- pyt_model,
- args=example_inputs,
- dynamic_shapes={"x": {0: batch}}
- )
- input_specs1 = (torch_trt.Input(min_shape=(1, 3, 224, 224), opt_shape=(100, 3, 224, 224), max_shape=(200, 3, 224, 224)),)
+ pyt_model, args=example_inputs, dynamic_shapes={"x": {0: batch}}
+ )
+ input_specs1 = (
+ torch_trt.Input(
+ min_shape=(1, 3, 224, 224),
+ opt_shape=(100, 3, 224, 224),
+ max_shape=(200, 3, 224, 224),
+ ),
+ )
settings1 = CompilationSettings(
- make_refittable=True,
- cache_built_engines=True,
- reuse_cached_engines=True
+ make_refittable=True, cache_built_engines=True, reuse_cached_engines=True
)
hash1 = BaseEngineCache.get_hash(exp_program1.module(), input_specs1, settings1)
exp_program2 = torch.export.export(
- pyt_model,
- args=example_inputs,
- dynamic_shapes={"x": {0: batch}}
- )
- input_specs2 = (torch_trt.Input(min_shape=(1, 3, 224, 224), opt_shape=(100, 3, 224, 224), max_shape=(200, 3, 224, 224)),)
+ pyt_model, args=example_inputs, dynamic_shapes={"x": {0: batch}}
+ )
+ input_specs2 = (
+ torch_trt.Input(
+ min_shape=(1, 3, 224, 224),
+ opt_shape=(100, 3, 224, 224),
+ max_shape=(200, 3, 224, 224),
+ ),
+ )
settings2 = CompilationSettings(
- make_refittable=True,
- cache_built_engines=True,
- reuse_cached_engines=True
+ make_refittable=True, cache_built_engines=True, reuse_cached_engines=True
)
hash2 = BaseEngineCache.get_hash(exp_program2.module(), input_specs2, settings2)
self.assertEqual(hash1, hash2)
-
-
def test_input_shape_change_is_not_equal(self):
pyt_model = models.resnet18(pretrained=True).eval().to("cuda")
example_inputs = (torch.randn((100, 3, 224, 224)).to("cuda"),)
batch = torch.export.Dim("batch", min=1, max=200)
exp_program1 = torch.export.export(
- pyt_model,
- args=example_inputs,
- dynamic_shapes={"x": {0: batch}}
- )
- input_specs1 = (torch_trt.Input(min_shape=(1, 3, 224, 224), opt_shape=(100, 3, 224, 224), max_shape=(200, 3, 224, 224)),)
+ pyt_model, args=example_inputs, dynamic_shapes={"x": {0: batch}}
+ )
+ input_specs1 = (
+ torch_trt.Input(
+ min_shape=(1, 3, 224, 224),
+ opt_shape=(100, 3, 224, 224),
+ max_shape=(200, 3, 224, 224),
+ ),
+ )
settings1 = CompilationSettings(
- make_refittable=True,
- cache_built_engines=True,
- reuse_cached_engines=True
+ make_refittable=True, cache_built_engines=True, reuse_cached_engines=True
)
hash1 = BaseEngineCache.get_hash(exp_program1.module(), input_specs1, settings1)
exp_program2 = torch.export.export(
- pyt_model,
- args=example_inputs,
- dynamic_shapes={"x": {0: batch}}
- )
- input_specs2 = (torch_trt.Input(min_shape=(1, 3, 300, 300), opt_shape=(100, 3, 300, 300), max_shape=(200, 3, 300, 300)),)
+ pyt_model, args=example_inputs, dynamic_shapes={"x": {0: batch}}
+ )
+ input_specs2 = (
+ torch_trt.Input(
+ min_shape=(1, 3, 300, 300),
+ opt_shape=(100, 3, 300, 300),
+ max_shape=(200, 3, 300, 300),
+ ),
+ )
settings2 = CompilationSettings(
- make_refittable=True,
- cache_built_engines=True,
- reuse_cached_engines=True
+ make_refittable=True, cache_built_engines=True, reuse_cached_engines=True
)
hash2 = BaseEngineCache.get_hash(exp_program2.module(), input_specs2, settings2)
self.assertNotEqual(hash1, hash2)
-
def test_engine_settings_is_not_equal(self):
pyt_model = models.resnet18(pretrained=True).eval().to("cuda")
example_inputs = (torch.randn((100, 3, 224, 224)).to("cuda"),)
batch = torch.export.Dim("batch", min=1, max=200)
exp_program1 = torch.export.export(
- pyt_model,
- args=example_inputs,
- dynamic_shapes={"x": {0: batch}}
- )
- input_specs1 = (torch_trt.Input(min_shape=(1, 3, 224, 224), opt_shape=(100, 3, 224, 224), max_shape=(200, 3, 224, 224)),)
+ pyt_model, args=example_inputs, dynamic_shapes={"x": {0: batch}}
+ )
+ input_specs1 = (
+ torch_trt.Input(
+ min_shape=(1, 3, 224, 224),
+ opt_shape=(100, 3, 224, 224),
+ max_shape=(200, 3, 224, 224),
+ ),
+ )
settings1 = CompilationSettings(
make_refittable=True,
cache_built_engines=True,
reuse_cached_engines=True,
- enabled_precisions={torch.float32}
+ enabled_precisions={torch.float32},
)
hash1 = BaseEngineCache.get_hash(exp_program1.module(), input_specs1, settings1)
exp_program2 = torch.export.export(
- pyt_model,
- args=example_inputs,
- dynamic_shapes={"x": {0: batch}}
- )
- input_specs2 = (torch_trt.Input(min_shape=(1, 3, 300, 300), opt_shape=(100, 3, 300, 300), max_shape=(200, 3, 300, 300)),)
+ pyt_model, args=example_inputs, dynamic_shapes={"x": {0: batch}}
+ )
+ input_specs2 = (
+ torch_trt.Input(
+ min_shape=(1, 3, 300, 300),
+ opt_shape=(100, 3, 300, 300),
+ max_shape=(200, 3, 300, 300),
+ ),
+ )
settings2 = CompilationSettings(
make_refittable=True,
cache_built_engines=True,
reuse_cached_engines=True,
- enabled_precisions={torch.float32, torch.float16}
+ enabled_precisions={torch.float32, torch.float16},
)
hash2 = BaseEngineCache.get_hash(exp_program2.module(), input_specs2, settings2)
self.assertNotEqual(hash1, hash2)
@@ -207,11 +220,11 @@
debug=False,
min_block_size=1,
make_refittable=True,
cache_built_engines=cache_built_engines,
reuse_cached_engines=reuse_cached_engines,
- engine_cache_dir=engine_cache_dir
+ engine_cache_dir=engine_cache_dir,
)
end.record()
torch.cuda.synchronize()
torch._dynamo.reset()
times.append(start.elapsed_time(end))
@@ -288,12 +301,17 @@
assertions.assertTrue(
cos_sim > COSINE_THRESHOLD,
msg=f"results[1] doesn't match with results[2]. Cosine sim score: {cos_sim} Threshold: {COSINE_THRESHOLD}",
)
- [assertions.assertTrue(count == 1, f"cache was not hit exactly once for entry ({h}, hit: {count})") for h, count in custom_engine_cache.hashes.items()]
-
+ [
+ assertions.assertTrue(
+ count == 1,
+ f"cache was not hit exactly once for entry ({h}, hit: {count})",
+ )
+ for h, count in custom_engine_cache.hashes.items()
+ ]
def test_dynamo_compile_change_input_shape(self):
"""Runs compilation 3 times, the cache should miss each time"""
model = models.resnet18(pretrained=True).eval().to("cuda")
# Mark the dim0 of inputs as dynamic
@@ -303,11 +321,11 @@
shutil.rmtree(engine_cache_dir)
custom_engine_cache = MyEngineCache(engine_cache_dir)
for i in range(3):
- inputs = (torch.rand((4*(i + 1), 3, 224, 224)).to("cuda"),)
+ inputs = (torch.rand((4 * (i + 1), 3, 224, 224)).to("cuda"),)
trt_gm = torch_trt.dynamo.compile(
torch.export.export(model, args=inputs),
inputs=inputs,
use_python_runtime=False,
enabled_precisions={torch.float},
@@ -316,11 +334,16 @@
make_refittable=True,
cache_built_engines=True,
reuse_cached_engines=True,
)
- [assertions.assertTrue(count == 0, f"Unintended cache hit for entry ({h}, hit: {count})") for h, count in custom_engine_cache.hashes.items()]
+ [
+ assertions.assertTrue(
+ count == 0, f"Unintended cache hit for entry ({h}, hit: {count})"
+ )
+ for h, count in custom_engine_cache.hashes.items()
+ ]
@pytest.mark.xfail
def test_torch_compile_with_default_disk_engine_cache(self):
# Custom Engine Cache
model = models.resnet18(pretrained=True).eval().to("cuda")
@@ -360,11 +383,11 @@
"make_refittable": True,
"cache_built_engines": cache_built_engines,
"reuse_cached_engines": reuse_cached_engines,
"engine_cache_dir": engine_cache_dir,
"engine_cache_size": 1 << 30, # 1GB
- "torch_executed_ops": {"torch.ops.aten.relu.default"}
+ "torch_executed_ops": {"torch.ops.aten.relu.default"},
},
)
results.append(compiled_model(*inputs)) # trigger the compilation
end.record()
torch.cuda.synchronize()
@@ -426,11 +449,11 @@
"min_block_size": 1,
"make_refittable": True,
"cache_built_engines": cache_built_engines,
"reuse_cached_engines": reuse_cached_engines,
"custom_engine_cache": custom_engine_cache,
- "torch_executed_ops": {"torch.ops.aten.relu.default"}
+ "torch_executed_ops": {"torch.ops.aten.relu.default"},
},
)
results.append(compiled_model(*inputs)) # trigger the compilation
end.record()
torch.cuda.synchronize()
@@ -447,13 +470,17 @@
assertions.assertTrue(
cos_sim > COSINE_THRESHOLD,
msg=f"results[1] doesn't match with results[2]. Cosine sim score: {cos_sim} Threshold: {COSINE_THRESHOLD}",
)
- [assertions.assertTrue(count == 1, f"cache was not hit exactly once for entry ({h}, hit: {count})") for h, count in custom_engine_cache.hashes.items()]
-
-
+ [
+ assertions.assertTrue(
+ count == 1,
+ f"cache was not hit exactly once for entry ({h}, hit: {count})",
+ )
+ for h, count in custom_engine_cache.hashes.items()
+ ]
def test_torch_compile_change_input_shape(self):
# Custom Engine Cache
model = models.resnet18(pretrained=True).eval().to("cuda")
@@ -475,10 +502,15 @@
"min_block_size": 1,
"make_refittable": True,
"cache_built_engines": True,
"reuse_cached_engines": True,
"custom_engine_cache": custom_engine_cache,
- "torch_executed_ops": {"torch.ops.aten.relu.default"}
+ "torch_executed_ops": {"torch.ops.aten.relu.default"},
},
)
- [assertions.assertTrue(count == 0, f"Unintended cache hit for entry ({h}, hit: {count})") for h, count in custom_engine_cache.hashes.items()]
+ [
+ assertions.assertTrue(
+ count == 0, f"Unintended cache hit for entry ({h}, hit: {count})"
+ )
+ for h, count in custom_engine_cache.hashes.items()
+ ]
--- /home/runner/work/TensorRT/TensorRT/tests/py/dynamo/models/test_models_export.py 2024-09-11 04:38:03.057108+00:00
+++ /home/runner/work/TensorRT/TensorRT/tests/py/dynamo/models/test_models_export.py 2024-09-11 04:38:31.564254+00:00
@@ -196,11 +196,12 @@
@unittest.skipIf(
torch.cuda.get_device_properties(torch.cuda.current_device()).major < 9,
"FP8 compilation in Torch-TRT is not supported on cards older than Hopper",
)
@unittest.skipIf(
- not importlib.util.find_spec("modelopt"), reason="ModelOpt is necessary to run this test"
+ not importlib.util.find_spec("modelopt"),
+ reason="ModelOpt is necessary to run this test",
)
@pytest.mark.unit
def test_base_fp8(ir):
import modelopt
@@ -244,12 +245,14 @@
reuse_cached_engines=False,
)
outputs_trt = trt_model(input_tensor)
assert torch.allclose(output_pyt, outputs_trt, rtol=1e-3, atol=1e-2)
+
@unittest.skipIf(
- not importlib.util.find_spec("modelopt") or Version(importlib.metadata.version("modelopt")) < Version("0.16.1"),
+ not importlib.util.find_spec("modelopt")
+ or Version(importlib.metadata.version("modelopt")) < Version("0.16.1"),
"modelopt 0.16.1 or later is required Int8 quantization is supported in modelopt since 0.16.1 or later",
)
@pytest.mark.unit
def test_base_int8(ir):
import modelopt
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
There are some changes that do not conform to Python style guidelines:
--- /home/runner/work/TensorRT/TensorRT/py/torch_tensorrt/dynamo/_settings.py 2024-09-11 04:38:04.275319+00:00
+++ /home/runner/work/TensorRT/TensorRT/py/torch_tensorrt/dynamo/_settings.py 2024-09-11 04:38:27.851601+00:00
@@ -112,28 +112,31 @@
lazy_engine_init: bool = LAZY_ENGINE_INIT
cache_built_engines: bool = CACHE_BUILT_ENGINES
reuse_cached_engines: bool = REUSE_CACHED_ENGINES
-_SETTINGS_TO_BE_ENGINE_INVARIANT =(
+_SETTINGS_TO_BE_ENGINE_INVARIANT = (
"enabled_precisions",
"max_aux_streams",
"version_compatible",
"optimization_level",
"disable_tf32",
"sparse_weights",
"make_refittable",
"engine_capability",
- "hardware_compatible",)
+ "hardware_compatible",
+)
-def settings_are_compatible(set_a: CompilationSettings, set_b: CompilationSettings) -> Tuple[bool, Set[str]]:
+def settings_are_compatible(
+ set_a: CompilationSettings, set_b: CompilationSettings
+) -> Tuple[bool, Set[str]]:
incompatible_settings: Set[str] = set()
for field in _SETTINGS_TO_BE_ENGINE_INVARIANT:
- if getattr(set_a, field) != getattr(set_b, field):
- incompatible_settings.add(field)
+ if getattr(set_a, field) != getattr(set_b, field):
+ incompatible_settings.add(field)
if len(incompatible_settings) == 0:
return True, set()
else:
return False, incompatible_settings
--- /home/runner/work/TensorRT/TensorRT/py/torch_tensorrt/dynamo/_engine_cache.py 2024-09-11 04:38:04.275319+00:00
+++ /home/runner/work/TensorRT/TensorRT/py/torch_tensorrt/dynamo/_engine_cache.py 2024-09-11 04:38:27.896330+00:00
@@ -12,16 +12,27 @@
from sympy.polys.matrices.dense import Sequence
import torch
from torch._inductor.codecache import FxGraphCachePickler, sha256_hash
from torch.fx.experimental.proxy_tensor import unset_fake_temporarily
-from torch_tensorrt.dynamo._settings import CompilationSettings, _SETTINGS_TO_BE_ENGINE_INVARIANT
+from torch_tensorrt.dynamo._settings import (
+ CompilationSettings,
+ _SETTINGS_TO_BE_ENGINE_INVARIANT,
+)
from torch_tensorrt._Input import Input
_LOGGER: logging.Logger = logging.getLogger(__name__)
-UnpackedCacheHit = Tuple[bytes, List[str], List[str], Tuple[Input], CompilationSettings, Optional[Dict[Any, Any]]]
+UnpackedCacheHit = Tuple[
+ bytes,
+ List[str],
+ List[str],
+ Tuple[Input],
+ CompilationSettings,
+ Optional[Dict[Any, Any]],
+]
+
class BaseEngineCache(ABC):
@abstractmethod
def __init__(
@@ -30,11 +41,15 @@
**kwargs: Any,
) -> None:
pass
@staticmethod
- def get_hash(gm: torch.fx.GraphModule, input_specs: Sequence[Input], settings: CompilationSettings) -> str:
+ def get_hash(
+ gm: torch.fx.GraphModule,
+ input_specs: Sequence[Input],
+ settings: CompilationSettings,
+ ) -> str:
"""Get the hash value of the GraphModule
Args:
gm (torch.fx.GraphModule): GraphModule to hash
@@ -53,11 +68,13 @@
with io.BytesIO() as stream:
input_specs_data = pickle.dumps(input_spec_strs)
input_specs_data = pickletools.optimize(input_specs_data)
input_specs_hash = sha256_hash(input_specs_data)
- invariant_engine_specs = [str(getattr(settings, field)) for field in _SETTINGS_TO_BE_ENGINE_INVARIANT]
+ invariant_engine_specs = [
+ str(getattr(settings, field)) for field in _SETTINGS_TO_BE_ENGINE_INVARIANT
+ ]
with io.BytesIO() as stream:
engine_specs_data = pickle.dumps(invariant_engine_specs)
engine_specs_data = pickletools.optimize(engine_specs_data)
engine_specs_hash = sha256_hash(engine_specs_data)
@@ -87,12 +104,11 @@
bytes: packed blob
"""
settings = copy.deepcopy(compilation_settings)
settings.torch_executed_ops = {
- f"torch.ops.{op.__str__()}"
- for op in settings.torch_executed_ops
+ f"torch.ops.{op.__str__()}" for op in settings.torch_executed_ops
}
return pickle.dumps(
{
"serialized_engine": bytes(serialized_engine),
@@ -122,11 +138,13 @@
unpacked["input_specs"],
unpacked["compilation_settings"],
unpacked["weight_name_map"],
)
- def insert(self, hash: str, entry: UnpackedCacheHit, *args: Any, **kwargs: Any) -> None:
+ def insert(
+ self, hash: str, entry: UnpackedCacheHit, *args: Any, **kwargs: Any
+ ) -> None:
"""
Insert a cache entry into the engine cache.
Args:
hash (str): The hash value of the GraphModule.
@@ -137,11 +155,10 @@
Returns:
None
"""
packed_cache_info = BaseEngineCache.pack(*entry)
return self.save(hash, packed_cache_info, *args, **kwargs)
-
def check(self, hash: str, *args: Any, **kwargs: Any) -> Optional[UnpackedCacheHit]:
"""
Check if a cache entry exists for the given hash.
--- /home/runner/work/TensorRT/TensorRT/py/torch_tensorrt/dynamo/conversion/_TRTInterpreter.py 2024-09-11 04:38:04.275319+00:00
+++ /home/runner/work/TensorRT/TensorRT/py/torch_tensorrt/dynamo/conversion/_TRTInterpreter.py 2024-09-11 04:38:28.499150+00:00
@@ -531,20 +531,35 @@
if self.engine_cache is not None:
if (
self.compilation_settings.cache_built_engines
or self.compilation_settings.reuse_cached_engines
):
- hash_val = self.engine_cache.get_hash(self.module, self.input_specs, self.compilation_settings)
+ hash_val = self.engine_cache.get_hash(
+ self.module, self.input_specs, self.compilation_settings
+ )
if self.compilation_settings.reuse_cached_engines:
# query the cached TRT engine
cached_data = self.engine_cache.check(hash_val)
- if cached_data is not None: # hit the cache
- (serialized_engine, self._input_names, self._output_names, engine_input_specs, engine_compilation_settings, self.weight_name_map) = cached_data
-
- setting_compatiblity, incompattible_settings = settings_are_compatible(self.compilation_settings, engine_compilation_settings)
- assert setting_compatiblity, f"Attempted to refit a prebuilt engine with incompatible settings: {incompattible_settings}, (old_settings: {engine_compilation_settings}, new_settings: {self.compilation_settings})"
+ if cached_data is not None: # hit the cache
+ (
+ serialized_engine,
+ self._input_names,
+ self._output_names,
+ engine_input_specs,
+ engine_compilation_settings,
+ self.weight_name_map,
+ ) = cached_data
+
+ setting_compatiblity, incompattible_settings = (
+ settings_are_compatible(
+ self.compilation_settings, engine_compilation_settings
+ )
+ )
+ assert (
+ setting_compatiblity
+ ), f"Attempted to refit a prebuilt engine with incompatible settings: {incompattible_settings}, (old_settings: {engine_compilation_settings}, new_settings: {self.compilation_settings})"
_LOGGER.info(
"Found the cached engine that corresponds to this graph. It is directly loaded."
)
@@ -609,18 +624,21 @@
)
if (
self.engine_cache is not None
and self.compilation_settings.cache_built_engines
):
- self.engine_cache.insert(hash_val, (
- serialized_engine,
- self._input_names,
- self._output_names,
- self.input_specs,
- self.compilation_settings,
- self.weight_name_map,
- ))
+ self.engine_cache.insert(
+ hash_val,
+ (
+ serialized_engine,
+ self._input_names,
+ self._output_names,
+ self.input_specs,
+ self.compilation_settings,
+ self.weight_name_map,
+ ),
+ )
with io.BytesIO() as engine_bytes:
engine_bytes.write(serialized_engine)
engine_str = engine_bytes.getvalue()
--- /home/runner/work/TensorRT/TensorRT/tests/py/dynamo/models/test_models_export.py 2024-09-11 04:38:04.303319+00:00
+++ /home/runner/work/TensorRT/TensorRT/tests/py/dynamo/models/test_models_export.py 2024-09-11 04:38:32.809570+00:00
@@ -196,11 +196,12 @@
@unittest.skipIf(
torch.cuda.get_device_properties(torch.cuda.current_device()).major < 9,
"FP8 compilation in Torch-TRT is not supported on cards older than Hopper",
)
@unittest.skipIf(
- not importlib.util.find_spec("modelopt"), reason="ModelOpt is necessary to run this test"
+ not importlib.util.find_spec("modelopt"),
+ reason="ModelOpt is necessary to run this test",
)
@pytest.mark.unit
def test_base_fp8(ir):
import modelopt
@@ -244,12 +245,14 @@
reuse_cached_engines=False,
)
outputs_trt = trt_model(input_tensor)
assert torch.allclose(output_pyt, outputs_trt, rtol=1e-3, atol=1e-2)
+
@unittest.skipIf(
- not importlib.util.find_spec("modelopt") or Version(importlib.metadata.version("modelopt")) < Version("0.16.1"),
+ not importlib.util.find_spec("modelopt")
+ or Version(importlib.metadata.version("modelopt")) < Version("0.16.1"),
"modelopt 0.16.1 or later is required Int8 quantization is supported in modelopt since 0.16.1 or later",
)
@pytest.mark.unit
def test_base_int8(ir):
import modelopt
--- /home/runner/work/TensorRT/TensorRT/tests/py/dynamo/models/test_engine_cache.py 2024-09-11 04:38:04.303319+00:00
+++ /home/runner/work/TensorRT/TensorRT/tests/py/dynamo/models/test_engine_cache.py 2024-09-11 04:38:32.811120+00:00
@@ -16,11 +16,10 @@
from torch_tensorrt.dynamo.utils import COSINE_THRESHOLD, cosine_similarity
assertions = unittest.TestCase()
-
class MyEngineCache(BaseEngineCache):
def __init__(
self,
engine_cache_dir: str,
) -> None:
@@ -55,111 +54,125 @@
blob = f.read()
self.hashes[hash] += 1
return blob
return None
+
class TestHashFunction(TestCase):
def test_reexport_is_equal(self):
pyt_model = models.resnet18(pretrained=True).eval().to("cuda")
example_inputs = (torch.randn((100, 3, 224, 224)).to("cuda"),)
batch = torch.export.Dim("batch", min=1, max=200)
exp_program1 = torch.export.export(
- pyt_model,
- args=example_inputs,
- dynamic_shapes={"x": {0: batch}}
- )
- input_specs1 = (torch_trt.Input(min_shape=(1, 3, 224, 224), opt_shape=(100, 3, 224, 224), max_shape=(200, 3, 224, 224)),)
+ pyt_model, args=example_inputs, dynamic_shapes={"x": {0: batch}}
+ )
+ input_specs1 = (
+ torch_trt.Input(
+ min_shape=(1, 3, 224, 224),
+ opt_shape=(100, 3, 224, 224),
+ max_shape=(200, 3, 224, 224),
+ ),
+ )
settings1 = CompilationSettings(
- make_refittable=True,
- cache_built_engines=True,
- reuse_cached_engines=True
+ make_refittable=True, cache_built_engines=True, reuse_cached_engines=True
)
hash1 = BaseEngineCache.get_hash(exp_program1.module(), input_specs1, settings1)
exp_program2 = torch.export.export(
- pyt_model,
- args=example_inputs,
- dynamic_shapes={"x": {0: batch}}
- )
- input_specs2 = (torch_trt.Input(min_shape=(1, 3, 224, 224), opt_shape=(100, 3, 224, 224), max_shape=(200, 3, 224, 224)),)
+ pyt_model, args=example_inputs, dynamic_shapes={"x": {0: batch}}
+ )
+ input_specs2 = (
+ torch_trt.Input(
+ min_shape=(1, 3, 224, 224),
+ opt_shape=(100, 3, 224, 224),
+ max_shape=(200, 3, 224, 224),
+ ),
+ )
settings2 = CompilationSettings(
- make_refittable=True,
- cache_built_engines=True,
- reuse_cached_engines=True
+ make_refittable=True, cache_built_engines=True, reuse_cached_engines=True
)
hash2 = BaseEngineCache.get_hash(exp_program2.module(), input_specs2, settings2)
self.assertEqual(hash1, hash2)
-
-
def test_input_shape_change_is_not_equal(self):
pyt_model = models.resnet18(pretrained=True).eval().to("cuda")
example_inputs = (torch.randn((100, 3, 224, 224)).to("cuda"),)
batch = torch.export.Dim("batch", min=1, max=200)
exp_program1 = torch.export.export(
- pyt_model,
- args=example_inputs,
- dynamic_shapes={"x": {0: batch}}
- )
- input_specs1 = (torch_trt.Input(min_shape=(1, 3, 224, 224), opt_shape=(100, 3, 224, 224), max_shape=(200, 3, 224, 224)),)
+ pyt_model, args=example_inputs, dynamic_shapes={"x": {0: batch}}
+ )
+ input_specs1 = (
+ torch_trt.Input(
+ min_shape=(1, 3, 224, 224),
+ opt_shape=(100, 3, 224, 224),
+ max_shape=(200, 3, 224, 224),
+ ),
+ )
settings1 = CompilationSettings(
- make_refittable=True,
- cache_built_engines=True,
- reuse_cached_engines=True
+ make_refittable=True, cache_built_engines=True, reuse_cached_engines=True
)
hash1 = BaseEngineCache.get_hash(exp_program1.module(), input_specs1, settings1)
exp_program2 = torch.export.export(
- pyt_model,
- args=example_inputs,
- dynamic_shapes={"x": {0: batch}}
- )
- input_specs2 = (torch_trt.Input(min_shape=(1, 3, 300, 300), opt_shape=(100, 3, 300, 300), max_shape=(200, 3, 300, 300)),)
+ pyt_model, args=example_inputs, dynamic_shapes={"x": {0: batch}}
+ )
+ input_specs2 = (
+ torch_trt.Input(
+ min_shape=(1, 3, 300, 300),
+ opt_shape=(100, 3, 300, 300),
+ max_shape=(200, 3, 300, 300),
+ ),
+ )
settings2 = CompilationSettings(
- make_refittable=True,
- cache_built_engines=True,
- reuse_cached_engines=True
+ make_refittable=True, cache_built_engines=True, reuse_cached_engines=True
)
hash2 = BaseEngineCache.get_hash(exp_program2.module(), input_specs2, settings2)
self.assertNotEqual(hash1, hash2)
-
def test_engine_settings_is_not_equal(self):
pyt_model = models.resnet18(pretrained=True).eval().to("cuda")
example_inputs = (torch.randn((100, 3, 224, 224)).to("cuda"),)
batch = torch.export.Dim("batch", min=1, max=200)
exp_program1 = torch.export.export(
- pyt_model,
- args=example_inputs,
- dynamic_shapes={"x": {0: batch}}
- )
- input_specs1 = (torch_trt.Input(min_shape=(1, 3, 224, 224), opt_shape=(100, 3, 224, 224), max_shape=(200, 3, 224, 224)),)
+ pyt_model, args=example_inputs, dynamic_shapes={"x": {0: batch}}
+ )
+ input_specs1 = (
+ torch_trt.Input(
+ min_shape=(1, 3, 224, 224),
+ opt_shape=(100, 3, 224, 224),
+ max_shape=(200, 3, 224, 224),
+ ),
+ )
settings1 = CompilationSettings(
make_refittable=True,
cache_built_engines=True,
reuse_cached_engines=True,
- enabled_precisions={torch.float32}
+ enabled_precisions={torch.float32},
)
hash1 = BaseEngineCache.get_hash(exp_program1.module(), input_specs1, settings1)
exp_program2 = torch.export.export(
- pyt_model,
- args=example_inputs,
- dynamic_shapes={"x": {0: batch}}
- )
- input_specs2 = (torch_trt.Input(min_shape=(1, 3, 300, 300), opt_shape=(100, 3, 300, 300), max_shape=(200, 3, 300, 300)),)
+ pyt_model, args=example_inputs, dynamic_shapes={"x": {0: batch}}
+ )
+ input_specs2 = (
+ torch_trt.Input(
+ min_shape=(1, 3, 300, 300),
+ opt_shape=(100, 3, 300, 300),
+ max_shape=(200, 3, 300, 300),
+ ),
+ )
settings2 = CompilationSettings(
make_refittable=True,
cache_built_engines=True,
reuse_cached_engines=True,
- enabled_precisions={torch.float32, torch.float16}
+ enabled_precisions={torch.float32, torch.float16},
)
hash2 = BaseEngineCache.get_hash(exp_program2.module(), input_specs2, settings2)
self.assertNotEqual(hash1, hash2)
@@ -207,11 +220,11 @@
debug=False,
min_block_size=1,
make_refittable=True,
cache_built_engines=cache_built_engines,
reuse_cached_engines=reuse_cached_engines,
- engine_cache_dir=engine_cache_dir
+ engine_cache_dir=engine_cache_dir,
)
end.record()
torch.cuda.synchronize()
torch._dynamo.reset()
times.append(start.elapsed_time(end))
@@ -288,12 +301,17 @@
assertions.assertTrue(
cos_sim > COSINE_THRESHOLD,
msg=f"results[1] doesn't match with results[2]. Cosine sim score: {cos_sim} Threshold: {COSINE_THRESHOLD}",
)
- [assertions.assertTrue(count == 1, f"cache was not hit exactly once for entry ({h}, hit: {count})") for h, count in custom_engine_cache.hashes.items()]
-
+ [
+ assertions.assertTrue(
+ count == 1,
+ f"cache was not hit exactly once for entry ({h}, hit: {count})",
+ )
+ for h, count in custom_engine_cache.hashes.items()
+ ]
def test_dynamo_compile_change_input_shape(self):
"""Runs compilation 3 times, the cache should miss each time"""
model = models.resnet18(pretrained=True).eval().to("cuda")
# Mark the dim0 of inputs as dynamic
@@ -303,11 +321,11 @@
shutil.rmtree(engine_cache_dir)
custom_engine_cache = MyEngineCache(engine_cache_dir)
for i in range(3):
- inputs = (torch.rand((4*(i + 1), 3, 224, 224)).to("cuda"),)
+ inputs = (torch.rand((4 * (i + 1), 3, 224, 224)).to("cuda"),)
trt_gm = torch_trt.dynamo.compile(
torch.export.export(model, args=inputs),
inputs=inputs,
use_python_runtime=False,
enabled_precisions={torch.float},
@@ -316,11 +334,16 @@
make_refittable=True,
cache_built_engines=True,
reuse_cached_engines=True,
)
- [assertions.assertTrue(count == 0, f"Unintended cache hit for entry ({h}, hit: {count})") for h, count in custom_engine_cache.hashes.items()]
+ [
+ assertions.assertTrue(
+ count == 0, f"Unintended cache hit for entry ({h}, hit: {count})"
+ )
+ for h, count in custom_engine_cache.hashes.items()
+ ]
@pytest.mark.xfail
def test_torch_compile_with_default_disk_engine_cache(self):
# Custom Engine Cache
model = models.resnet18(pretrained=True).eval().to("cuda")
@@ -360,11 +383,11 @@
"make_refittable": True,
"cache_built_engines": cache_built_engines,
"reuse_cached_engines": reuse_cached_engines,
"engine_cache_dir": engine_cache_dir,
"engine_cache_size": 1 << 30, # 1GB
- "torch_executed_ops": {"torch.ops.aten.relu.default"}
+ "torch_executed_ops": {"torch.ops.aten.relu.default"},
},
)
results.append(compiled_model(*inputs)) # trigger the compilation
end.record()
torch.cuda.synchronize()
@@ -426,11 +449,11 @@
"min_block_size": 1,
"make_refittable": True,
"cache_built_engines": cache_built_engines,
"reuse_cached_engines": reuse_cached_engines,
"custom_engine_cache": custom_engine_cache,
- "torch_executed_ops": {"torch.ops.aten.relu.default"}
+ "torch_executed_ops": {"torch.ops.aten.relu.default"},
},
)
results.append(compiled_model(*inputs)) # trigger the compilation
end.record()
torch.cuda.synchronize()
@@ -447,13 +470,17 @@
assertions.assertTrue(
cos_sim > COSINE_THRESHOLD,
msg=f"results[1] doesn't match with results[2]. Cosine sim score: {cos_sim} Threshold: {COSINE_THRESHOLD}",
)
- [assertions.assertTrue(count == 1, f"cache was not hit exactly once for entry ({h}, hit: {count})") for h, count in custom_engine_cache.hashes.items()]
-
-
+ [
+ assertions.assertTrue(
+ count == 1,
+ f"cache was not hit exactly once for entry ({h}, hit: {count})",
+ )
+ for h, count in custom_engine_cache.hashes.items()
+ ]
def test_torch_compile_change_input_shape(self):
# Custom Engine Cache
model = models.resnet18(pretrained=True).eval().to("cuda")
@@ -475,10 +502,15 @@
"min_block_size": 1,
"make_refittable": True,
"cache_built_engines": True,
"reuse_cached_engines": True,
"custom_engine_cache": custom_engine_cache,
- "torch_executed_ops": {"torch.ops.aten.relu.default"}
+ "torch_executed_ops": {"torch.ops.aten.relu.default"},
},
)
- [assertions.assertTrue(count == 0, f"Unintended cache hit for entry ({h}, hit: {count})") for h, count in custom_engine_cache.hashes.items()]
+ [
+ assertions.assertTrue(
+ count == 0, f"Unintended cache hit for entry ({h}, hit: {count})"
+ )
+ for h, count in custom_engine_cache.hashes.items()
+ ]
Description
Adds distingushing base on shape and compile settings to the engine cache
Fixes #3148
Type of change
Please delete options that are not relevant and/or add your own.
Checklist: